
*********************************************************************************
*Appendix Table 1 & 2: OLS regression model migration status, years of education, 
*                      labor supply and work time
*********************************************************************************

version 15.1
cd "${mypath}\CHNS_project\01_data\02_posted\"

log using "${mypath}\CHNS_project\03_log_files\08_appendix_table_1_3.log", replace

use CHNS_1993_2015_20_imputed.dta,clear
mi import ice, automatic

*generate exclusion condition, that is, people who work in agriculture in urban area
gen be_farmer = (migrant_group != 2 & occupation == 3 & work_unit == 4) if be_employed == 1

*generate "over the statutory hours last week"
gen over_lastw = hours_lastw1 > 44 if employed_last1 == 1

*generate annual salary last year(log)
gen log_wage_i = log(exp(log_wage_1)*workmonths1+exp(log_bonuse_1)) if employed_last1 == 1

*generate hourly wage last year(log)
gen log_hour_wage = log_wage_1-log((365/12/7)*workdays1*workhours1) if employed_last1 == 1
     
*assign outcome variables to the local macro
local outcome_1 "be_migrant"
local outcome_2 "year_education"
local outcome_3 "be_employed"
local outcome_4 "workmonths1"
local outcome_5 "hours_lastw1"
local outcome_6 "over_lastw"
local outcome_7 "log_wage_i"
local outcome_8 "log_hour_wage"

*assign key independent variables to the local macro
local inde_1 "year1997 year2000 year2004 year2006 year2009 year2011 year2015"
local inde_2 "year1997 year2000 year2004 year2006 year2009 year2011 year2015 be_migrant be_migrant##year1997 be_migrant##year2000 be_migrant##year2004 be_migrant##year2006 be_migrant##year2009 be_migrant##year2011 be_migrant##year2015"

*assign control variables to the local macro
local control_1 "age26_30 age31_35 age36_40 age41_45 age46_50 age51_55 gender ethnicity year_education marital_status h_group1 h_group2 h_group4 h_group5 num_child num_elderly population pcgdp child_elder percent_farmer service_sector Liaoning Heilongjiang Shandong Henan Hubei Hunan Guangxi Guizhou"
local control_2 "age26_30 age31_35 age36_40 age41_45 age46_50 age51_55 gender ethnicity marital_status h_group1 h_group2 h_group4 h_group5 num_child num_elderly population pcgdp child_elder percent_farmer service_sector Liaoning Heilongjiang Shandong Henan Hubei Hunan Guangxi Guizhou"

local path "${mypath}\CHNS_project\04_tables\appendix_table_1_3\"

*simple linear regression for migration and years of education
quiet eststo: mi estimate,post esampvaryok: reg `outcome_1' `inde_1' `control_1' if time != 1 & be_farmer !=  1 ,r cluster(IDind) noomitted
est store outcome1_all,title(outcome1_all)
quiet eststo: mi estimate,post esampvaryok: reg `outcome_1' `inde_1' `control_1' if time != 1 & be_farmer !=  1 & degree_education <= 3, r cluster(IDind) noomitted
est store outcome1_low,title(outcome1_low)
quiet eststo: mi estimate,post esampvaryok: reg `outcome_1' `inde_1' `control_1' if time != 1 & be_farmer !=  1 & degree_education > 3, r cluster(IDind) noomitted
est store outcome1_high,title(outcome1_high)
quiet eststo: mi estimate,post esampvaryok: reg `outcome_2' `inde_2' `control_2' if be_rural_resident == 0 & time != 1 & be_farmer !=  1,r cluster(IDind) noomitted
est store outcome2_all,title(outcome2_all)
quiet eststo: mi estimate,post esampvaryok: reg `outcome_2' `inde_2' `control_2' if be_rural_resident == 0 & time != 1 & be_farmer !=  1 & degree_education <= 3,r cluster(IDind) noomitted
est store outcome2_low,title(outcome2_low)
quiet eststo: mi estimate,post esampvaryok: reg `outcome_2' `inde_2' `control_2' if be_rural_resident == 0 & time != 1 & be_farmer !=  1 & degree_education > 3,r cluster(IDind) noomitted
est store outcome2_high,title(outcome2_high)
esttab outcome1_all outcome1_low outcome1_high outcome2_all outcome2_low outcome2_high using "`path'01_appendix_table1.csv", constant b(3) star(+ 0.10 * 0.05 ** 0.01) se(%9.3f) r2 replace nogap nonumbers noomitted stats(N_mi)

*simple linear regression for work-related outcome
quiet eststo: mi estimate,post esampvaryok: reg `outcome_3' `inde_2' `control_1' if be_rural_resident == 0 & time != 1 & be_farmer !=  1 & degree_education <= 3,r cluster(IDind) noomitted
est store outcome3,title(outcome1)

forv i = 4/8 {
	quiet eststo: mi estimate,post esampvaryok: reg `outcome_`i'' `inde_2' `control_1' if employed_last1==1 & be_rural_resident == 0 & time != 1 & be_farmer !=  1 & degree_education <= 3,r cluster(IDind) noomitted
	est store outcome`i',title(outcome`i')
}

esttab outcome3 outcome4 outcome5 outcome6 using "`path'02_appendix_table2.csv", constant b(3) star(+ 0.10 * 0.05 ** 0.01) se(%9.3f) r2 replace nogap nonumbers noomitted stats(N)
esttab outcome7 outcome8 using "`path'03_appendix_table3.csv", constant b(3) star(+ 0.10 * 0.05 ** 0.01) se(%9.3f) r2 replace nogap nonumbers noomitted stats(N)

log close
